
********************************************************************
***** DATA CREATION:  GENDER AND COLLABORATION (LORENZO DUCTOR, SANJEEV GOYAL AND ANJA PRUMMER)
***** CREATED BY: Marco J. van der Leij 
***** OBJECTIVE: Create a predicted time invariant journal quality index
********************************************************************


clear
capture log close
set mem 100m
log using journals.log, replace

insheet using journals.txt

gen tii=1
replace tii=4 if ranking=="AA"
replace tii=3 if ranking=="A"
replace tii=2 if ranking=="B"
label variable tii "Tinbergen Institute Index"
drop ranking

drop comment


rename kodr~limpact kyjournal
rename kodr~eimpact kyarticle
rename characters~e xxx
gen charpage=real(xxx)
drop xxx
rename numberofpa~s xxx
gen npages=real(xxx)
drop xxx

rename numberofar~s narticle
rename immediacyi~x immediacy

replace citedhl="10.0" if citedhl==">10.0"
gen cited=real(citedhl)
replace citinghl="10.0" if citinghl==">10.0"
gen citing=real(citinghl)
drop citedhl
drop citinghl

label variable journal "Journal name"
label variable id "ID code of journal"
label variable since70s "Journal published since the 1970s"
label variable kyjournal "Kodrzycki&Yu index by journal impact"
label variable kyarticle "Kodrzycki&Yu index by article impact"
label variable charpage "Characters per page"
label variable narticle "Number of articles (alternative source)"
label variable npages "Number of pages"
label variable immediacy "Immediacy index"
label variable cited "Cited HL"
label variable citing "Citing HL"
label variable article "Number of articles per year"
label variable economics "Dummy=1 if economic journal"
label variable repeated "Journal appears twice in database"

replace impactfactor=0 if impactfactor==.
replace totalcites=1 if totalcites==.
recode repeated .=0

gen ljournal=log(kyjournal+0.005)
gen larticle=log(kyarticle+0.005)
gen lcites=log(totalcites)
gen lnart=log(articles)
gen ecites=economics*lcites
gen efactor=economics*impactfactor
gen enart=economics*lnart

sum

foreach i in kyjournal kyarticle ljournal larticle {
*	foreach j in cited citing immediacy impactfactor narticle npages since70s lcites tii {
*	corr `i' `j'
*	}
	xi: regress `i' i.tii economics
	predict s`i'
	xi: regress `i' lnart enart lcites ecites impactfactor efactor i.tii economics
	predict r`i'
	xi: regress `i' lnart enart lcites ecites impactfactor efactor cited citing immediacy since70s i.tii economics
	predict m`i'

	gen p`i'=`i'
	replace p`i'=m`i' if p`i'==.
	replace p`i'=r`i' if p`i'==.
	replace p`i'=s`i' if p`i'==.
	drop r`i'
	drop s`i'
	drop m`i'
}
capture drop ljournal larticle lcites ecites efactor lnart enart

label variable pkyjournal "Predicted Kodrzycki&Yu index by journal impact"
label variable pkyarticle "Predicted Kodrzycki&Yu index by article impact"
label variable pljournal "Predicted Log of Kodrzycki&Yu index by journal impact"
label variable plarticle "Predicted Log of Kodrzycki&Yu index by article impact"
sum

drop _Itii*

save journal, replace

log close

